{
  "_class_name": "AutoencoderKL_imgtovideo",
  "ch": 128,
  "out_ch": 3,
  "in_channels": 3,
  "resolution": 256,
  "z_channels": 4,
  "embed_dim": 4,
  "attn_resolutions": [],
  "ch_mult": [
    1,
    2,
    4,
    4
  ],
  "num_res_blocks": 2,
  "double_z": "True",

  "seed": 666,
  "batch_size": 1,
  "target_fps": 8,
  "max_frames": 32,
  "latent_hei": 32,
  "latent_wid": 56,
  "resolution_crop": 448,
  "vit_resolution": 224,
  "vit_mean": [
    0.48145466,
    0.4578275,
    0.40821073
  ],
  "vit_std": [
    0.26862954,
    0.26130258,
    0.27577711],
  "beta_type": "linear",
  "num_timesteps": 1000,
  "init_beta": 0.00085,
  "last_beta": 0.0120,
  "mean_type": "v",
  "var_type": "fixed_small",
  "loss_type": "mse",
  "noise_strength": 0.1,
  "input_dim": 1024,
  "ddim_timesteps": 50,
  "guide_scale": 3.0,
  "scale_factor": 0.18215,
  "decoder_bs": 8

}
